
/*******************************************************************************/
/*  
	This script uses ONET and OES data to create our LWFH and HPP measures
	Download here: https://www.bls.gov/tus/datafiles-2018.htm
    --
	Alexander Weinberg
	February 16, 2021
*/
/*******************************************************************************/

//________________________________________________________________
// Merge ONET and OES Data
use "../Data/OES/oes_nat_clean.dta", clear 
merge m:1 oes2019 using "../Data/ONET/onet_clean"

//________________________________________________________________
// Many OES occupations do not have O*NET codes. 
qui count if _merge == 1
display as text "Num. of OES occs which don't merge = " as result `r(N)'

//________________________________________________________________
// OES occupations missing O*NET data are typically "misc." occupations. (end with digit 9)
tab oes2019 if _merge==1

qui su tot_emp if _merge == 1
gen aux1_nat = `r(sum)' / 1000000
display as text "Emp. (millions) in national OES which doesn't have O*NET vars = " as result aux1_nat

qui su tot_emp if _merge==3
gen aux2_nat = `r(sum)' / 1000000
display as text "Emp. (millions) in nat OES which does have O*NET vars = " as result aux2_nat
drop _merge aux?_nat

//________________________________________________________________
// COLLAPSE TO OCC CODES
merge m:1 oes2019 using "../Raw Data/Crosswalks/oessoc2010_to_occuh_xwalk.dta"	// xwalk to census OCC codes
collapse (mean) _wa_* _wc_* (rawsum) tot_emp (first) occtitle occ_?digit [aw=tot_emp], fast by(occ)

//________________________________________________________________
// Prep for WFH and PP measures
rename _wc_Physical_Proximit_4C2a3 pp
label var pp "Physical proximity, continuous"

// reverse email so that larger number indicates less email (= harder to WFH)
replace _wc_Electronic_Mail_4C1a2h = 5 - _wc_Electronic_Mail_4C1a2h + 1 


/*----------------------------------------------------*/
			/* [>   WFH   <] */ 
/*----------------------------------------------------*/


//________________________________________________________________
// Binary variable if raw O*NET var is >= 3.5
foreach vv of varlist _wc_* _wa_* {
	replace `vv' = (`vv' >= 3.5) 	
}

//________________________________________________________________
// Create the Low WFH variable
// Low_WFH is a variable in {1, ..., 17}
gen low_wfh = (  _wc_Electronic_Mail_4C1a2h  + _wc_Outdoors_Exposed_4C2a1c + _wc_Outdoors_Under_Co_4C2a1d ///
	+ _wc_Deal_With_Physica_4C1d3 + _wc_Wear_Common_Prote_4C2e1d + _wc_Wear_Specialized_4C2e1e ///
	+ _wc_Exposed_to_Diseas_4C2c1b + _wc_Exposed_to_Minor_4C2c1f + _wc_Spend_Time_Walkin_4C2d1d ///
	+ _wa_Performing_Genera_4A3a1 + _wa_Handling_and_Movi_4A3a2  + _wa_Controlling_Machi_4A3a3 ///
	+ _wa_Operating_Vehicle_4A3a4 + _wa_Performing_for_or_4A4a8  + _wa_Repairing_and_Mai_4A3b5 ///
	+ _wa_Repairing_and_Mai_4A3b4 + _wa_Inspecting_Equipm_4A1b2  )

//________________________________________________________________
// Make Binary/Quartile Variables
drop if mi(low_wfh) | mi(pp)
gen cumemp  	= 0				// initialize
gen empfrac 	= 0
egen labforce 	= total(tot_emp) 

//________________________________________________________________
// BINARY WORK-FROM-HOME 
summ low_wfh [fw=tot_emp], detail					
local q1  	 = `r(p25)'											// get the Q1
local median = `r(p50)'											// get the median
local q3     = `r(p75)'											// get the median

gen low_wfh_binary = low_wfh > `median'
gen low_wfh_q1 	   = low_wfh <= `q1'							// HWFH
gen low_wfh_q2 	   = low_wfh <= `median' & low_wfh > `q1'
gen low_wfh_q3 	   = low_wfh <= `q3' & low_wfh > `median'
gen low_wfh_q4 	   = low_wfh > `q3'								// LWFH
display as text "LWFH Median = " as result `median'

/*----------------------------------------------------*/
			/* [>   PHYSICAL-PROXIMITY   <] */ 
/*----------------------------------------------------*/


//________________________________________________________________
// BINARY PHYSICAL-PROXIMITY
summ pp [fw=tot_emp], detail					
local q1  	 = `r(p25)'											// get the Q1
local median = `r(p50)'											// get the median
local q3     = `r(p75)'											// get the Q2

gen pp_binary  = pp > `median'
gen pp_q1 	   = pp <= `q1'										// LPP
gen pp_q2 	   = pp <= `median' & pp > `q1'
gen pp_q3 	   = pp <= `q3' & pp > `median'
gen pp_q4 	   = pp > `q3'										// HPP
display as text "HPP Median = " as result `median'

//________________________________________________________________
// Label
label define lwfhlab 0 "HWFH" 1 "LWFH"
label values low_wfh_binary lwfhlab
label var low_wfh_binary "Takes a 1 if above low_WFH median"

rename pp_binary high_pp_binary
label define hpplab 0 "LPP" 1 "HPP"
label values high_pp_binary hpplab
label var high_pp_binary "Takes a 1 if above PP median"

gen high_wfh_binary = 1 - low_wfh_binary
gen low_pp_binary   = 1 - high_pp_binary

//________________________________________________________________
// Export at 3-Digit OCC level 
sort  occ
order occ* tot_emp low_wfh* pp* 
keep  occ* tot_emp low_wfh* pp* *binary // _wa_* _wc_*
compress
save "../Data/onet_occ_clean", replace

/* CSV for sharing */
preserve
	keep occ occtitle low_wfh pp *_binary
	export delimited "../Files/lwfh_pp_occ_3digit.csv", replace
restore

//________________________________________________________________
// Export at 2-Digit OCC level
collapse (mean) low_wfh pp (rawsum) tot_emp (first) occ_1digit [aw=tot_emp], by(occ_2digit)

//________________________________________________________________
// linearly scale 
summ pp
local ppmax 		= r(max)
local ppmin 		= r(min)
replace pp  		= (pp - `ppmin') / (`ppmax' - `ppmin') // scale to [0,1]

summ low_wfh
local wfhmax 		= r(max)
local wfhmin 		= r(min)
replace low_wfh  	= (low_wfh - `wfhmin') / (`wfhmax' - `wfhmin') // scale to [0,1]

gen high_wfh 		= 1 - low_wfh

label values occ_1digit occ_1_label
label values occ_2digit occ_2_label

compress
save "../Data/2_digit_pp_wfh_onet", replace

//________________________________________________________________
// CSV for sharing
replace pp 				= round(pp, 0.01)
replace low_wfh 		= round(low_wfh, 0.01)
compress

keep occ_?digit low_wfh pp
export delimited "../Files/lwfh_pp_occ_2digit.csv", replace


// end


